package yao.spider.jsoup.xiangrikui;



import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import yao.common.excel.ExcelCreator;
import yao.common.file.FileUtils;

/**
 *读取失败的url进行抓取
 * 
 * @author chenyao
 * @date 2016年11月28日 下午6:22:38
 */
public class XiangRiKuiReplenishSpider extends XiangRiKuiSpider {

	private final String path;	//扫描的路径
	private final String fileMatchExpression; 	// 文件匹配表达式
	

	protected XiangRiKuiReplenishSpider(String errorFilePath,ExcelCreator excelCreator, String path, String fileMatchExpression) {
		super(errorFilePath,excelCreator);
		this.path = path;
		this.fileMatchExpression = fileMatchExpression;
	}
	
	@Override
	protected List<String> getUrls() {
		List<String> results = new ArrayList<String>();
		Path dir = Paths.get(this.path);
		try(DirectoryStream<Path> stream = Files.newDirectoryStream(dir, this.fileMatchExpression)) {
			for(Path entry : stream) {
				List<String> urls = FileUtils.readByUTF8(entry);
				results.addAll(urls);
			}
		} catch (IOException e) {
			e.printStackTrace();
			throw new RuntimeException(e);
		}
		return results;
	}
	
	
	

}
